import scrapy
from scrapy.http.response.html import HtmlResponse


class AnzhiSpider(scrapy.Spider):
    name = "anzhi"
    start_urls = ['http://www.anzhi.com/search.php?keyword=%E8%A7%86%E9%A2%91']
    base_url = 'http://www.anzhi.com'
    allowed_domains = ['anzhi.com']

    def parse(self, response: HtmlResponse):
        apk_ids = response.selector.xpath(
            '//@onclick[starts-with(., "open")]').re('\d+')
        if apk_ids:
            for apk_id in apk_ids:
                file_urls = ['http://www.anzhi.com/dl_app.php?s=' + apk_id]
                yield {'file_urls': file_urls}
        # Next pages
        hrefs = response.selector.xpath('//@href').getall()
        if not hrefs:
            return
        for href in hrefs:
            try:
                next_page = None
                if href.startswith('/'):
                    next_page = self.base_url + href
                elif 'anzhi.com' in hrefs:
                    next_page = href
                else:
                    continue
                if not next_page.startswith('http://'):
                    next_page = 'http://' + next_page
                yield scrapy.Request(next_page, callback=self.parse)
            except:
                self.settings.error(f'failed to add {href} as next request')
